# -*- coding: utf-8 -*-
import scrapy
from osm.items import OsmItem

#scrapy crawl douban_spider

class DoubanSpiderSpider(scrapy.Spider):
    #爬虫名
    name = 'osm_spider'
    #允许域名
    allowed_domains = ['wiki.openstreetmap.org']
    start_urls = ['https://wiki.openstreetmap.org/wiki/Map_Features']

    def parse(self, response):
        # //div[@class="article"]//ol[@class=grid_view]//ol
        tr_list = response.xpath("//table/tbody/tr")
        for tr in tr_list:
            osm_item = OsmItem()
            osm_item["key"] = tr.xpath(
                ".//td[1]/a/text()").extract_first()
            osm_item["keyHref"] = tr.xpath(
                ".//td[1]/a/@href").extract_first()
            osm_item["valueHref"] = tr.xpath(
                ".//td[2]/a/@href").extract_first()
            osm_item["value"] = tr.xpath(
                ".//td[2]/a/text()").extract_first()
            osm_item["description"] = tr.xpath(
                ".//td[4]/text()").extract_first()
            print(osm_item)
            yield osm_item


